/* Copyright 2025 The xLLM Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    https://github.com/jd-opensource/xllm-service/blob/main/LICENSE

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#pragma once

#include "chat_template/jinja_chat_template.h"
#include "common/types.h"
#include "common/xllm/output.h"

namespace xllm_service {

// Store request-related data
struct Request {
  // model name
  std::string model;

  // request id generated by service
  std::string service_request_id;

  // whether to stream the response
  bool stream = false;

  // whether to return usage
  bool include_usage = false;

  bool offline = false;

  // input prompt
  std::string prompt;

  // input messages
  ChatMessages messages;

  // token ids of prompt
  std::vector<int32_t> token_ids;

  // instance routing
  Routing routing;

  // the number of generated tokens
  int64_t num_generated_tokens = 0;

  // the estimated TTFT obtained from the TTFT predictor
  int64_t estimated_ttft = 0;

  // output callback
  OutputCallback output_callback;

  // trace callback
  std::function<void(const std::string&)> trace_callback = nullptr;
};

}  // namespace xllm_service